from pypinyin import lazy_pinyin, Style
from tqdm.auto import tqdm


def 威妥玛拼音(词语: str) -> str:
    py = lazy_pinyin(词语, style=Style.WADEGILES)
    return " ".join(py)


with open("nugine_solutions/data/wadegiles_phrases.txt", "w") as dst:
    with open("nugine_solutions/data/large_pinyin.txt") as src:
        for line in tqdm(src.readlines(), total=411959):
            if line.startswith("#"):
                continue
            词语, _ = line.split(": ")
            拼音 = 威妥玛拼音(词语)
            dst.write(f"{词语}:{拼音}\n")

with open("nugine_solutions/data/wadegiles_chars.txt", "w") as dst:
    with open("nugine_solutions/data/pinyin.txt") as src:
        for line in tqdm(src.readlines(), total=41653):
            if line.startswith("#"):
                continue
            codepoint, _ = line.split(": ")
            字符 = chr(int(codepoint[2:], 16))
            拼音 = 威妥玛拼音(字符)
            dst.write(f"{字符}:{拼音}\n")
